# coding=utf-8
# -*- coding: UTF-8 -*-
#
# This file is part of the kernelstudio package.
#
# (c) 2014-2025 zlin <admin@kernelstudio.com>
#
# For the full copyright and license information, please view the LICENSE file
# that was distributed with this source code.
import typing as t

from ksai.document.context import DocumentSection
from ksai.document.extractor.extractor import AbstractDocumentExtractor
from ksai.document.ocr.factory import ocr_factory
from ksai.persistence.storage.attachment import Attachment


class ImageOcrDocumentExtractor(AbstractDocumentExtractor):

    def __init__(self):
        super().__init__()

    def do_extract(self, attachment: Attachment) -> t.List[DocumentSection]:
        text = ocr_factory.execute(attachment)
        return [DocumentSection(text=text)]

    def extensions(self) -> t.List[str]:
        return ["jpg", "jpeg", 'png']

    def mimes(self) -> t.List[str]:
        return ["image/jpeg", "image/png"]
